In [11]:
#1 st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Load and prepare data
df = pd.read_csv(r"C:\Users\Deepthi P B\Downloads\housing (1).csv")
df['total_bedrooms'] = df['total_bedrooms'].fillna(df['total_bedrooms'].median())
df.iloc[:,2:7] = df.iloc[:,2:7].astype(int)

# Data overview
print(f"Dataset Shape: {df.shape}\n")
print("Basic Statistics:\n", df.describe().T)
print("\nMissing Values:\n", df.isnull().sum())
print("\nDuplicates:", df.duplicated().sum())

Numerical = df.select_dtypes(include=[np.number]).columns
print(Numerical)
for col in Numerical:
    plt.figure(figsize=(10,6))
    df[col].plot(kind= 'hist', title =col, bins = 60, edgecolor='black')
    plt.show()
for col in Numerical:
    plt.figure(figsize=(6,6))
    sns.boxplot(df[col],color='red')
    plt.title(col)
    plt.ylabel(col)
    plt.show()
Dataset Shape: (20640, 10)

Basic Statistics:
                       count           mean            std         min  \
longitude           20640.0    -119.569704       2.003532   -124.3500   
latitude            20640.0      35.631861       2.135952     32.5400   
housing_median_age  20640.0      28.639486      12.585558      1.0000   
total_rooms         20640.0    2635.763081    2181.615252      2.0000   
total_bedrooms      20640.0     536.838857     419.391878      1.0000   
population          20640.0    1425.476744    1132.462122      3.0000   
households          20640.0     499.539680     382.329753      1.0000   
median_income       20640.0       3.870671       1.899822      0.4999   
median_house_value  20640.0  206855.816909  115395.615874  14999.0000   

                            25%          50%           75%          max  
longitude             -121.8000    -118.4900    -118.01000    -114.3100  
latitude                33.9300      34.2600      37.71000      41.9500  
housing_median_age      18.0000      29.0000      37.00000      52.0000  
total_rooms           1447.7500    2127.0000    3148.00000   39320.0000  
total_bedrooms         297.0000     435.0000     643.25000    6445.0000  
population             787.0000    1166.0000    1725.00000   35682.0000  
households             280.0000     409.0000     605.00000    6082.0000  
median_income            2.5634       3.5348       4.74325      15.0001  
median_house_value  119600.0000  179700.0000  264725.00000  500001.0000  

Missing Values:
 longitude             0
latitude              0
housing_median_age    0
total_rooms           0
total_bedrooms        0
population            0
households            0
median_income         0
median_house_value    0
ocean_proximity       0
dtype: int64

Duplicates: 0
Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value'],
      dtype='object')
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [14]:
#2nd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

# Load and visualize data
data = fetch_california_housing(as_frame=True).frame
plt.figure(figsize=(10, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', fmt='.2f', linewidths=.5)
plt.title('Feature Correlation Matrix')
plt.show()

sns.pairplot(data, diag_kind='kde', plot_kws={'alpha': 0.5})
plt.suptitle('Feature Pair Relationships', y=1.02)
plt.show()
No description has been provided for this image
No description has been provided for this image
In [4]:
#3rd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA

# Load and transform data
iris = load_iris()
data_2d = PCA(n_components=2).fit_transform(iris.data)

# Plot
plt.figure(figsize=(8, 6))
for i, name in enumerate(iris.target_names):
    plt.scatter(data_2d[iris.target==i, 0], data_2d[iris.target==i, 1], 
                label=name, c=['r','g','b'][i])
plt.title('PCA on Iris Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.grid()
plt.show()
No description has been provided for this image
In [3]:
#4th
import pandas as pd

def find_s(file_path):
    data = pd.read_csv(file_path)
    print("Training data:\n", data)
    
    first_positive = data[data.iloc[:, -1] == 'Yes'].iloc[0]
    h = list(first_positive.iloc[:-1]) 
    
    for _, r in data[data.iloc[:, -1] == 'Yes'].iloc[1:].iterrows():
        h = [hv if hv == rv else '?' for hv, rv in zip(h, r.iloc[:-1])]
    
    return h

path = r"C:\Users\Deepthi P B\Downloads\training_data.csv"
final_h = find_s(path)
print("\nFinal hypothesis:", final_h)
Training data:
     Outlook Temperature Humidity  Windy PlayTennis
0     Sunny         Hot     High  False         No
1     Sunny         Hot     High   True         No
2  Overcast         Hot     High  False        Yes
3      Rain        Cold     High  False        Yes
4      Rain        Cold     High   True         No
5  Overcast         Hot     High   True        Yes
6     Sunny         Hot     High  False         No

Final hypothesis: ['?', '?', 'High', '?']
In [5]:
#5th
import numpy as np, matplotlib.pyplot as plt
from collections import Counter

data = np.random.rand(100)
train, test = data[:50], data[50:]
labels = np.where(train <= 0.5, "Class1", "Class2")

def knn(x, k=3):
    nearest = labels[np.argpartition(np.abs(train - x), k)[:k]]
    return Counter(nearest).most_common(1)[0][0]

for k in [1, 2, 3, 4, 5, 20, 30]:
    res = [knn(x, k) for x in test]
    print(f"\nk={k} results:")
    [print(f"x{i+51}: {v:.3f}→{l}") for i, (v, l) in enumerate(zip(test, res))]
    
    plt.figure(figsize=(10, 3))
    for c, l in [('b', 'Class1'), ('r', 'Class2')]:
        plt.scatter(train[labels==l], [0]*sum(labels==l), c=c, marker='o')
        plt.scatter(test[np.array(res)==l], [1]*sum(np.array(res)==l), c=c, marker='x')
    plt.yticks([0,1], ['Train','Test'])
    plt.title(f'k={k} Classification')
    plt.show()
k=1 results:
x51: 0.617→Class2
x52: 0.453→Class1
x53: 0.241→Class1
x54: 0.225→Class1
x55: 0.026→Class1
x56: 0.012→Class1
x57: 0.074→Class1
x58: 0.449→Class1
x59: 0.906→Class2
x60: 0.612→Class2
x61: 0.317→Class1
x62: 0.713→Class2
x63: 0.296→Class1
x64: 0.733→Class2
x65: 0.975→Class2
x66: 0.193→Class1
x67: 0.538→Class2
x68: 0.052→Class1
x69: 0.833→Class2
x70: 0.186→Class1
x71: 0.775→Class2
x72: 0.501→Class1
x73: 0.011→Class1
x74: 0.339→Class1
x75: 0.669→Class2
x76: 0.825→Class2
x77: 0.620→Class2
x78: 0.926→Class2
x79: 0.030→Class1
x80: 0.628→Class2
x81: 0.907→Class2
x82: 0.307→Class1
x83: 0.615→Class2
x84: 0.747→Class2
x85: 0.350→Class1
x86: 0.709→Class2
x87: 0.399→Class1
x88: 0.579→Class2
x89: 0.247→Class1
x90: 0.611→Class2
x91: 0.285→Class1
x92: 0.761→Class2
x93: 0.006→Class1
x94: 0.337→Class1
x95: 0.343→Class1
x96: 0.086→Class1
x97: 0.037→Class1
x98: 0.551→Class2
x99: 0.275→Class1
x100: 0.008→Class1
No description has been provided for this image
k=2 results:
x51: 0.617→Class2
x52: 0.453→Class1
x53: 0.241→Class1
x54: 0.225→Class1
x55: 0.026→Class1
x56: 0.012→Class1
x57: 0.074→Class1
x58: 0.449→Class1
x59: 0.906→Class2
x60: 0.612→Class2
x61: 0.317→Class1
x62: 0.713→Class2
x63: 0.296→Class1
x64: 0.733→Class2
x65: 0.975→Class2
x66: 0.193→Class1
x67: 0.538→Class2
x68: 0.052→Class1
x69: 0.833→Class2
x70: 0.186→Class1
x71: 0.775→Class2
x72: 0.501→Class1
x73: 0.011→Class1
x74: 0.339→Class1
x75: 0.669→Class2
x76: 0.825→Class2
x77: 0.620→Class2
x78: 0.926→Class2
x79: 0.030→Class1
x80: 0.628→Class2
x81: 0.907→Class2
x82: 0.307→Class1
x83: 0.615→Class2
x84: 0.747→Class2
x85: 0.350→Class1
x86: 0.709→Class2
x87: 0.399→Class1
x88: 0.579→Class2
x89: 0.247→Class1
x90: 0.611→Class2
x91: 0.285→Class1
x92: 0.761→Class2
x93: 0.006→Class1
x94: 0.337→Class1
x95: 0.343→Class1
x96: 0.086→Class1
x97: 0.037→Class1
x98: 0.551→Class2
x99: 0.275→Class1
x100: 0.008→Class1
No description has been provided for this image
k=3 results:
x51: 0.617→Class2
x52: 0.453→Class1
x53: 0.241→Class1
x54: 0.225→Class1
x55: 0.026→Class1
x56: 0.012→Class1
x57: 0.074→Class1
x58: 0.449→Class1
x59: 0.906→Class2
x60: 0.612→Class2
x61: 0.317→Class1
x62: 0.713→Class2
x63: 0.296→Class1
x64: 0.733→Class2
x65: 0.975→Class2
x66: 0.193→Class1
x67: 0.538→Class2
x68: 0.052→Class1
x69: 0.833→Class2
x70: 0.186→Class1
x71: 0.775→Class2
x72: 0.501→Class1
x73: 0.011→Class1
x74: 0.339→Class1
x75: 0.669→Class2
x76: 0.825→Class2
x77: 0.620→Class2
x78: 0.926→Class2
x79: 0.030→Class1
x80: 0.628→Class2
x81: 0.907→Class2
x82: 0.307→Class1
x83: 0.615→Class2
x84: 0.747→Class2
x85: 0.350→Class1
x86: 0.709→Class2
x87: 0.399→Class1
x88: 0.579→Class2
x89: 0.247→Class1
x90: 0.611→Class2
x91: 0.285→Class1
x92: 0.761→Class2
x93: 0.006→Class1
x94: 0.337→Class1
x95: 0.343→Class1
x96: 0.086→Class1
x97: 0.037→Class1
x98: 0.551→Class2
x99: 0.275→Class1
x100: 0.008→Class1
No description has been provided for this image
k=4 results:
x51: 0.617→Class2
x52: 0.453→Class1
x53: 0.241→Class1
x54: 0.225→Class1
x55: 0.026→Class1
x56: 0.012→Class1
x57: 0.074→Class1
x58: 0.449→Class1
x59: 0.906→Class2
x60: 0.612→Class2
x61: 0.317→Class1
x62: 0.713→Class2
x63: 0.296→Class1
x64: 0.733→Class2
x65: 0.975→Class2
x66: 0.193→Class1
x67: 0.538→Class2
x68: 0.052→Class1
x69: 0.833→Class2
x70: 0.186→Class1
x71: 0.775→Class2
x72: 0.501→Class1
x73: 0.011→Class1
x74: 0.339→Class1
x75: 0.669→Class2
x76: 0.825→Class2
x77: 0.620→Class2
x78: 0.926→Class2
x79: 0.030→Class1
x80: 0.628→Class2
x81: 0.907→Class2
x82: 0.307→Class1
x83: 0.615→Class2
x84: 0.747→Class2
x85: 0.350→Class1
x86: 0.709→Class2
x87: 0.399→Class1
x88: 0.579→Class2
x89: 0.247→Class1
x90: 0.611→Class2
x91: 0.285→Class1
x92: 0.761→Class2
x93: 0.006→Class1
x94: 0.337→Class1
x95: 0.343→Class1
x96: 0.086→Class1
x97: 0.037→Class1
x98: 0.551→Class2
x99: 0.275→Class1
x100: 0.008→Class1
No description has been provided for this image
k=5 results:
x51: 0.617→Class2
x52: 0.453→Class1
x53: 0.241→Class1
x54: 0.225→Class1
x55: 0.026→Class1
x56: 0.012→Class1
x57: 0.074→Class1
x58: 0.449→Class1
x59: 0.906→Class2
x60: 0.612→Class2
x61: 0.317→Class1
x62: 0.713→Class2
x63: 0.296→Class1
x64: 0.733→Class2
x65: 0.975→Class2
x66: 0.193→Class1
x67: 0.538→Class2
x68: 0.052→Class1
x69: 0.833→Class2
x70: 0.186→Class1
x71: 0.775→Class2
x72: 0.501→Class1
x73: 0.011→Class1
x74: 0.339→Class1
x75: 0.669→Class2
x76: 0.825→Class2
x77: 0.620→Class2
x78: 0.926→Class2
x79: 0.030→Class1
x80: 0.628→Class2
x81: 0.907→Class2
x82: 0.307→Class1
x83: 0.615→Class2
x84: 0.747→Class2
x85: 0.350→Class1
x86: 0.709→Class2
x87: 0.399→Class1
x88: 0.579→Class2
x89: 0.247→Class1
x90: 0.611→Class2
x91: 0.285→Class1
x92: 0.761→Class2
x93: 0.006→Class1
x94: 0.337→Class1
x95: 0.343→Class1
x96: 0.086→Class1
x97: 0.037→Class1
x98: 0.551→Class2
x99: 0.275→Class1
x100: 0.008→Class1
No description has been provided for this image
k=20 results:
x51: 0.617→Class2
x52: 0.453→Class1
x53: 0.241→Class1
x54: 0.225→Class1
x55: 0.026→Class1
x56: 0.012→Class1
x57: 0.074→Class1
x58: 0.449→Class1
x59: 0.906→Class2
x60: 0.612→Class2
x61: 0.317→Class1
x62: 0.713→Class2
x63: 0.296→Class1
x64: 0.733→Class2
x65: 0.975→Class2
x66: 0.193→Class1
x67: 0.538→Class1
x68: 0.052→Class1
x69: 0.833→Class2
x70: 0.186→Class1
x71: 0.775→Class2
x72: 0.501→Class1
x73: 0.011→Class1
x74: 0.339→Class1
x75: 0.669→Class2
x76: 0.825→Class2
x77: 0.620→Class2
x78: 0.926→Class2
x79: 0.030→Class1
x80: 0.628→Class2
x81: 0.907→Class2
x82: 0.307→Class1
x83: 0.615→Class2
x84: 0.747→Class2
x85: 0.350→Class1
x86: 0.709→Class2
x87: 0.399→Class1
x88: 0.579→Class2
x89: 0.247→Class1
x90: 0.611→Class2
x91: 0.285→Class1
x92: 0.761→Class2
x93: 0.006→Class1
x94: 0.337→Class1
x95: 0.343→Class1
x96: 0.086→Class1
x97: 0.037→Class1
x98: 0.551→Class2
x99: 0.275→Class1
x100: 0.008→Class1
No description has been provided for this image
k=30 results:
x51: 0.617→Class2
x52: 0.453→Class1
x53: 0.241→Class1
x54: 0.225→Class1
x55: 0.026→Class1
x56: 0.012→Class1
x57: 0.074→Class1
x58: 0.449→Class1
x59: 0.906→Class2
x60: 0.612→Class2
x61: 0.317→Class1
x62: 0.713→Class2
x63: 0.296→Class1
x64: 0.733→Class2
x65: 0.975→Class2
x66: 0.193→Class1
x67: 0.538→Class1
x68: 0.052→Class1
x69: 0.833→Class2
x70: 0.186→Class1
x71: 0.775→Class2
x72: 0.501→Class1
x73: 0.011→Class1
x74: 0.339→Class1
x75: 0.669→Class2
x76: 0.825→Class2
x77: 0.620→Class2
x78: 0.926→Class2
x79: 0.030→Class1
x80: 0.628→Class2
x81: 0.907→Class2
x82: 0.307→Class1
x83: 0.615→Class2
x84: 0.747→Class2
x85: 0.350→Class1
x86: 0.709→Class2
x87: 0.399→Class1
x88: 0.579→Class2
x89: 0.247→Class1
x90: 0.611→Class2
x91: 0.285→Class1
x92: 0.761→Class2
x93: 0.006→Class1
x94: 0.337→Class1
x95: 0.343→Class1
x96: 0.086→Class1
x97: 0.037→Class1
x98: 0.551→Class1
x99: 0.275→Class1
x100: 0.008→Class1
No description has been provided for this image
In [17]:
#6th
import numpy as np
import matplotlib.pyplot as plt

# Gaussian kernel-based regression
def predict(x, X, y, tau):
    w = np.exp(-np.sum((X - x)**2, axis=1) / (2 * tau**2))
    W = np.diag(w)
    theta = np.linalg.pinv(X.T @ W @ X) @ X.T @ W @ y
    return x @ theta

# Data
np.random.seed(42)
X = np.linspace(0, 2*np.pi, 100)
y = np.sin(X) + 0.1 * np.random.randn(100)
Xb = np.c_[np.ones_like(X), X]

# Test data
xt = np.linspace(0, 2*np.pi, 200)
xtb = np.c_[np.ones_like(xt), xt]
tau = 0.5
yp = [predict(xi, Xb, y, tau) for xi in xtb]

# Plot
plt.scatter(X, y, c='r', label='Train')
plt.plot(xt, yp, c='b', label=f'LWR tau={tau}')
plt.title('Locally Weighted Regression')
plt.legend()
plt.show()
No description has been provided for this image
In [19]:
#7th
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Linear Regression - California Housing
def linear_regression():
    X, y = fetch_california_housing(as_frame=True).data[["AveRooms"]], fetch_california_housing(as_frame=True).target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = LinearRegression().fit(X_train, y_train)
    y_pred = model.predict(X_test)
    plt.scatter(X_test, y_test, label="Actual")
    plt.plot(X_test, y_pred, label="Predicted", color="red")
    plt.title("Linear Regression - California Housing")
    plt.legend()
    plt.show()
    print("MSE:", mean_squared_error(y_test, y_pred), "R^2:", r2_score(y_test, y_pred))

# Polynomial Regression - Auto MPG
def polynomial_regression():
    data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data", sep='\s+', names=["mpg", "cylinders", "displacement", "horsepower", "weight", "acceleration", "model_year", "origin"], na_values="?").dropna()
    X, y = data["displacement"].values.reshape(-1, 1), data["mpg"].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression()).fit(X_train, y_train)
    y_pred = model.predict(X_test)
    plt.scatter(X_test, y_test, label="Actual")
    plt.scatter(X_test, y_pred, label="Predicted", color="red")
    plt.title("Polynomial Regression - Auto MPG")
    plt.legend()
    plt.show()
    print("MSE:", mean_squared_error(y_test, y_pred), "R^2:", r2_score(y_test, y_pred))

linear_regression()
polynomial_regression()
No description has been provided for this image
MSE: 1.2923314440807299 R^2: 0.013795337532284901
No description has been provided for this image
MSE: 0.7431490557205839 R^2: 0.7505650609469634
In [9]:
#8from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt

# Load and split data
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train and predict
model = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)
acc = model.score(X_test, y_test)
print(f"Model Accuracy: {acc * 100:.2f}%")

# Predict a single sample
pred = model.predict([X_test[0]])[0]
print("Predicted Class for the new sample:", "Benign" if pred else "Malignant")

# Plot tree
plt.figure(figsize=(12, 8))
plot_tree(model, filled=True)
plt.title("Decision Tree - Breast Cancer Dataset")
plt.show()
Model Accuracy: 94.74%
Predicted Class for the new sample: Benign
No description has been provided for this image
In [20]:
import numpy as np
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

data = fetch_olivetti_faces(shuffle=True, random_state=42)
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=1))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

cross_val_accuracy = cross_val_score(gnb, X, y, cv=5, scoring='accuracy')
print(f'\nCross-validation accuracy: {cross_val_accuracy.mean() * 100:.2f}%')

fig, axes = plt.subplots(3, 5, figsize=(12, 8))
for ax, image, label, prediction in zip(axes.ravel(), X_test, y_test, y_pred):
    ax.imshow(image.reshape(64, 64), cmap=plt.cm.gray)
    ax.set_title(f"True: {label}, Pred: {prediction}")
    ax.axis('off')

plt.show()
Accuracy: 80.83%

Classification Report:
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      1.00      1.00         2
           2       0.33      0.67      0.44         3
           3       1.00      0.00      0.00         5
           4       1.00      0.50      0.67         4
           5       1.00      1.00      1.00         2
           7       1.00      0.75      0.86         4
           8       1.00      0.67      0.80         3
           9       1.00      0.75      0.86         4
          10       1.00      1.00      1.00         3
          11       1.00      1.00      1.00         1
          12       0.40      1.00      0.57         4
          13       1.00      0.80      0.89         5
          14       1.00      0.40      0.57         5
          15       0.67      1.00      0.80         2
          16       1.00      0.67      0.80         3
          17       1.00      1.00      1.00         3
          18       1.00      1.00      1.00         3
          19       0.67      1.00      0.80         2
          20       1.00      1.00      1.00         3
          21       1.00      0.67      0.80         3
          22       1.00      0.60      0.75         5
          23       1.00      0.75      0.86         4
          24       1.00      1.00      1.00         3
          25       1.00      0.75      0.86         4
          26       1.00      1.00      1.00         2
          27       1.00      1.00      1.00         5
          28       0.50      1.00      0.67         2
          29       1.00      1.00      1.00         2
          30       1.00      1.00      1.00         2
          31       1.00      0.75      0.86         4
          32       1.00      1.00      1.00         2
          34       0.25      1.00      0.40         1
          35       1.00      1.00      1.00         5
          36       1.00      1.00      1.00         3
          37       1.00      1.00      1.00         1
          38       1.00      0.75      0.86         4
          39       0.50      1.00      0.67         5

    accuracy                           0.81       120
   macro avg       0.89      0.85      0.83       120
weighted avg       0.91      0.81      0.81       120


Confusion Matrix:
[[2 0 0 ... 0 0 0]
 [0 2 0 ... 0 0 0]
 [0 0 2 ... 0 0 1]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 3 0]
 [0 0 0 ... 0 0 5]]

Cross-validation accuracy: 87.25%
No description has been provided for this image
In [21]:
import numpy as np
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Load data
X, y = fetch_olivetti_faces(shuffle=True, random_state=42).data, fetch_olivetti_faces().target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train Naive Bayes
gnb = GaussianNB().fit(X_train, y_train)
y_pred = gnb.predict(X_test)

# Metrics
print(f'Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%')
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=1))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print(f'\nCross-validation accuracy: {cross_val_score(gnb, X, y, cv=5).mean() * 100:.2f}%')

# Plot sample images
fig, axes = plt.subplots(3, 5, figsize=(12, 8))
for ax, img, label, pred in zip(axes.ravel(), X_test, y_test, y_pred):
    ax.imshow(img.reshape(64, 64), cmap='gray')
    ax.set_title(f"True: {label}, Pred: {pred}")
    ax.axis('off')

plt.show()
Accuracy: 3.33%

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.00      0.00      0.00         4
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         4
           4       0.00      0.00      0.00         3
           5       0.00      0.00      0.00         3
           6       0.00      0.00      0.00         3
           7       1.00      0.00      0.00         8
           8       0.20      0.50      0.29         2
           9       1.00      0.00      0.00         3
          10       1.00      0.00      0.00         3
          11       0.50      0.20      0.29         5
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         3
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         3
          16       0.00      1.00      0.00         0
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         2
          19       0.00      0.00      0.00         1
          20       0.00      0.00      0.00         2
          21       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         4
          23       0.12      0.25      0.17         4
          24       0.00      0.00      0.00         3
          25       0.00      0.00      0.00         2
          26       0.11      0.25      0.15         4
          27       1.00      0.00      0.00         3
          28       1.00      0.00      0.00         4
          29       0.00      0.00      0.00         3
          30       0.00      0.00      0.00         2
          31       1.00      0.00      0.00         1
          32       0.00      0.00      0.00         3
          33       0.00      0.00      0.00         2
          34       0.00      0.00      0.00         2
          35       0.00      0.00      0.00         2
          36       0.00      0.00      0.00         2
          37       0.00      0.00      0.00         3
          38       1.00      0.00      0.00         7
          39       0.00      0.00      0.00         4

    accuracy                           0.03       120
   macro avg       0.20      0.06      0.02       120
weighted avg       0.27      0.03      0.03       120


Confusion Matrix:
 [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 1 0 0]
 [0 0 1 ... 0 0 0]]

Cross-validation accuracy: 4.00%
No description has been provided for this image
In [ ]: